The heatmap of the
data
numsub <- nrow(dataframe)
if (numsub > 1000) numsub <- 1000
if (!largeSet)
{
hm <- heatMaps(data=dataframeScaled[1:numsub,],
Outcome=outcome,
Scale=TRUE,
hCluster = "row",
xlab="Feature",
ylab="Sample",
srtCol=45,
srtRow=45,
cexCol=cexheat,
cexRow=cexheat
)
par(op)
}
Correlation
Matrix of the Data
The heat map of the data
if (!largeSet)
{
par(cex=0.6,cex.main=0.85,cex.axis=0.7)
#cormat <- Rfast::cora(as.matrix(dataframe[,varlist]),large=TRUE)
cormat <- cor(dataframe[,varlist],method="pearson")
cormat[is.na(cormat)] <- 0
gplots::heatmap.2(abs(cormat),
trace = "none",
# scale = "row",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "Original Correlation",
cexRow = cexheat,
cexCol = cexheat,
srtCol=45,
srtRow=45,
key.title=NA,
key.xlab="|Pearson Correlation|",
xlab="Feature", ylab="Feature")
diag(cormat) <- 0
print(max(abs(cormat)))
}
The
decorrelation
DEdataframe <- IDeA(dataframe,verbose=TRUE,thr=thro)
#>
#> g1int824 g4D02 g1int718 g10E08 g1int23 g5B03
#> g2E09 g7F07 g1A01 g3C09 g3H08 g1A08
#> 0.5669535 0.6771084 0.8096386 0.2306368 0.2034423 0.5913941
#>
#> Included: 2905 , Uni p: 5.163511e-05 , Base Size: 14 , Rcrit: 0.2950683
#>
#>
1 <R=0.857,thr=0.950>, Top: 6< 5 >[Fa= 6 ]( 6 , 14 , 0 ),<|><>Tot Used: 20 , Added: 14 , Zero Std: 0 , Max Cor: 0.952
#>
2 <R=0.855,thr=0.950>, Top: 1< 1 >[Fa= 7 ]( 1 , 1 , 6 ),<|><>Tot Used: 22 , Added: 1 , Zero Std: 0 , Max Cor: 0.950
#>
3 <R=0.855,thr=0.900>, Top: 51< 3 >[Fa= 54 ]( 51 , 82 , 7 ),<|><>Tot Used: 149 , Added: 82 , Zero Std: 0 , Max Cor: 0.914
#>
4 <R=0.845,thr=0.900>, Top: 3< 1 >[Fa= 57 ]( 3 , 3 , 54 ),<|><>Tot Used: 155 , Added: 3 , Zero Std: 0 , Max Cor: 0.900
#>
5 <R=0.845,thr=0.800>, Top: 204< 9 >..[Fa= 232 ]( 203 , 460 , 57 ),<|><>Tot Used: 777 , Added: 460 , Zero Std: 0 , Max Cor: 0.897
#>
6 <R=0.834,thr=0.800>, Top: 42< 13 >[Fa= 271 ]( 42 , 74 , 232 ),<|><>Tot Used: 887 , Added: 74 , Zero Std: 0 , Max Cor: 0.864
#>
7 <R=0.840,thr=0.800>, Top: 5< 3 >[Fa= 276 ]( 5 , 9 , 271 ),<|><>Tot Used: 901 , Added: 9 , Zero Std: 0 , Max Cor: 0.800
#>
8 <R=0.800,thr=0.800>
#>
[ 8 ], 0.7999747 Decor Dimension: 901 Nused: 901 . Cor to Base: 618 , ABase: 2905 , Outcome Base: 0
#>
varlistc <- colnames(DEdataframe)[colnames(DEdataframe) != outcome]
pander::pander(sum(apply(dataframe[,varlist],2,var)))
61.1
pander::pander(sum(apply(DEdataframe[,varlistc],2,var)))
46.4
pander::pander(entropy(discretize(unlist(dataframe[,varlist]), 256)))
3
pander::pander(entropy(discretize(unlist(DEdataframe[,varlistc]), 256)))
2.85
varratio <- attr(DEdataframe,"VarRatio")
pander::pander(tail(varratio))
| 0.0602 |
0.0568 |
0.0551 |
0.0548 |
0.0401 |
0.0344 |
The decorrelation
matrix
if (!largeSet)
{
par(cex=0.6,cex.main=0.85,cex.axis=0.7)
UPLTM <- attr(DEdataframe,"UPLTM")
gplots::heatmap.2(1.0*(abs(UPLTM)>0),
trace = "none",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "Decorrelation matrix",
cexRow = cexheat,
cexCol = cexheat,
srtCol=45,
srtRow=45,
key.title=NA,
key.xlab="|Beta|>0",
xlab="Output Feature", ylab="Input Feature")
par(op)
}
Formulas
Network
Displaying the features associations
par(op)
clustable <- c("To many variables")
transform <- attr(DEdataframe,"UPLTM") != 0
tnames <- colnames(transform)
colnames(transform) <- str_remove_all(colnames(transform),"La_")
transform <- abs(transform*cor(dataframe[,rownames(transform)])) # The weights are proportional to the observed correlation
fscore <- attr(DEdataframe,"fscore")
VertexSize <- fscore # The size depends on the variable independence relevance (fscore)
names(VertexSize) <- str_remove_all(names(VertexSize),"La_")
VertexSize <- 10*(VertexSize-min(VertexSize))/(max(VertexSize)-min(VertexSize)) # Normalization
VertexSize <- VertexSize[rownames(transform)]
rsum <- apply(1*(transform !=0),1,sum) + 0.01*VertexSize + 0.001*varratio[tnames]
csum <- apply(1*(transform !=0),2,sum) + 0.01*VertexSize + 0.001*varratio[tnames]
ntop <- min(10,length(rsum))
topfeatures <- unique(c(names(rsum[order(-rsum)])[1:ntop],names(csum[order(-csum)])[1:ntop]))
rtrans <- transform[topfeatures,]
csum <- (apply(1*(rtrans !=0),2,sum) > 1*(colnames(rtrans) %in% topfeatures))
rtrans <- rtrans[,csum]
topfeatures <- unique(c(topfeatures,colnames(rtrans)))
print(ncol(transform))
[1] 901
transform <- transform[topfeatures,topfeatures]
print(ncol(transform))
[1] 164
if (ncol(transform)>100)
{
csum <- apply(1*(transform !=0),1,sum)
csum <- csum[csum > 1]
csum <- csum + 0.01*VertexSize[names(csum)]
csum <- csum[order(-csum)]
tpsum <- min(20,length(csum))
trsum <- rownames(transform)[rownames(transform) %in% names(csum[1:tpsum])]
rtrans <- transform[trsum,]
topfeatures <- unique(c(rownames(rtrans),colnames(rtrans)))
transform <- transform[topfeatures,topfeatures]
if (nrow(transform) > 150)
{
csum <- apply(1*(rtrans != 0 ),2,sum)
csum <- csum + 0.01*VertexSize[names(csum)]
csum <- csum[order(-csum)]
tpsum <- min(130,length(csum))
csum <- rownames(transform)[rownames(transform) %in% names(csum[1:tpsum])]
csum <- unique(c(trsum,csum))
transform <- transform[csum,csum]
}
print(ncol(transform))
}
[1] 130
if (ncol(transform) < 150)
{
gplots::heatmap.2(transform,
trace = "none",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "Red Decorrelation matrix",
cexRow = cexheat,
cexCol = cexheat,
srtCol=45,
srtRow=45,
key.title=NA,
key.xlab="|Beta|>0",
xlab="Output Feature", ylab="Input Feature")
par(op)
VertexSize <- VertexSize[colnames(transform)]
gr <- graph_from_adjacency_matrix(transform,mode = "directed",diag = FALSE,weighted=TRUE)
gr$layout <- layout_with_fr
# fc <- cluster_optimal(gr)
fc <- cluster_walktrap (gr,steps=50)
plot(fc, gr,
edge.width = 2*E(gr)$weight,
vertex.size=VertexSize,
edge.arrow.size=0.5,
edge.arrow.width=0.5,
vertex.label.cex=(0.15+0.05*VertexSize),
vertex.label.dist=0.5 + 0.05*VertexSize,
main="Top Feature Association")
varratios <- varratio
fscores <- fscore
names(varratios) <- str_remove_all(names(varratios),"La_")
names(fscores) <- str_remove_all(names(fscores),"La_")
dc <- getLatentCoefficients(DEdataframe)
theCharformulas <- attr(dc,"LatentCharFormulas")
clustable <- as.data.frame(cbind(Variable=fc$names,
Formula=as.character(theCharformulas[paste("La_",fc$names,sep="")]),
Class=fc$membership,
ResidualVariance=round(varratios[fc$names],3),
Fscore=round(fscores[fc$names],3)
)
)
rownames(clustable) <- str_replace_all(rownames(clustable),"__","_")
clustable$Variable <- NULL
clustable$Class <- as.integer(clustable$Class)
clustable$ResidualVariance <- as.numeric(clustable$ResidualVariance)
clustable$Fscore <- as.numeric(clustable$Fscore)
clustable <- clustable[order(-clustable$Fscore),]
clustable <- clustable[order(clustable$Class),]
clustable <- clustable[clustable$Fscore >= -1,]
topv <- min(50,nrow(clustable))
clustable <- clustable[1:topv,]
}


pander::pander(clustable)
| g4D02 |
NA |
1 |
1.000 |
12 |
| g1int118 |
- (0.738)g4D02 + g1int118 |
1 |
0.351 |
2 |
| g1int105 |
+ g1int105 - (0.729)g4D02 |
1 |
0.310 |
-1 |
| g1int106 |
+ g1int106 - (0.700)g4D02 |
1 |
0.321 |
-1 |
| g1int107 |
+ g1int107 - (0.882)g4D02 |
1 |
0.275 |
-1 |
| g1int108 |
+ g1int108 - (0.798)g4D02 |
1 |
0.332 |
-1 |
| g4H01 |
+ g4H01 - (0.769)g4D02 |
1 |
0.167 |
-1 |
| g4A02 |
+ g4A02 - (0.873)g4D02 |
1 |
0.107 |
-1 |
| g4B02 |
- (1.046)g4D02 + g4B02 |
1 |
0.060 |
-1 |
| g1int109 |
- (0.795)g4D02 + g1int109 |
1 |
0.184 |
-1 |
| g1int110 |
- (0.551)g4D02 + g1int110 |
1 |
0.360 |
-1 |
| g1int111 |
- (0.886)g4D02 + g1int111 |
1 |
0.296 |
-1 |
| g1int112 |
- (0.627)g4D02 + g1int112 |
1 |
0.344 |
-1 |
| g1G04 |
+ g1G04 - (1.139)g1int118 |
1 |
0.145 |
-1 |
| g4E02 |
+ g4E02 - (0.953)g1int118 |
1 |
0.126 |
-1 |
| g1CNS316 |
- (0.961)g1int118 + g1CNS316 |
1 |
0.160 |
-1 |
| g11G02 |
NA |
2 |
1.000 |
10 |
| g11E03 |
- (0.738)g11G02 + g11E03 |
2 |
0.278 |
1 |
| g4E01 |
+ g4E01 - (0.670)g11G02 |
2 |
0.315 |
0 |
| g11E02 |
+ g11E02 - (0.642)g11G02 |
2 |
0.346 |
-1 |
| g11B02 |
+ g11B02 - (0.787)g11G02 |
2 |
0.262 |
-1 |
| g1int77 |
+ g1int77 - (0.624)g11G02 |
2 |
0.354 |
-1 |
| g7F01 |
+ g7F01 - (0.687)g11G02 |
2 |
0.318 |
-1 |
| g1D08 |
+ g1D08 - (0.967)g4E01 |
2 |
0.137 |
-1 |
| g9D05 |
+ g9D05 - (0.546)g11G02 |
2 |
0.324 |
-1 |
| g11A03 |
+ g11A03 - (0.885)g11E03 |
2 |
0.148 |
-1 |
| g11H02 |
+ g11H02 - (0.748)g11G02 |
2 |
0.229 |
-1 |
| g11F02 |
- (0.941)g11G02 + g11F02 |
2 |
0.123 |
-1 |
| g1int85 |
- (0.653)g11G02 + g1int85 |
2 |
0.332 |
-1 |
| g11C03 |
- (0.863)g11E03 + g11C03 |
2 |
0.180 |
-1 |
| g4H02 |
NA |
3 |
1.000 |
17 |
| g1int120 |
+ g1int120 - (0.802)g4H02 |
3 |
0.329 |
0 |
| g4F02 |
+ g4F02 - (0.845)g4H02 |
3 |
0.282 |
-1 |
| g1int121 |
+ g1int121 - (0.982)g1int120 |
3 |
0.189 |
-1 |
| g1int123 |
+ g1int123 - (0.734)g4H02 |
3 |
0.288 |
-1 |
| g1CNS124 |
+ g1CNS124 - (0.559)g4H02 |
3 |
0.344 |
-1 |
| g1CNS201 |
+ g1CNS201 - (0.762)g4H02 |
3 |
0.346 |
-1 |
| g1CNS236 |
+ g1CNS236 - (0.915)g4H02 |
3 |
0.195 |
-1 |
| g1CNS219 |
+ g1CNS219 - (0.918)g4H02 |
3 |
0.169 |
-1 |
| g1int126 |
+ g1int126 - (0.841)g4H02 |
3 |
0.278 |
-1 |
| g3D03 |
+ g3D03 - (0.960)g4H02 |
3 |
0.156 |
-1 |
| g1CNS421 |
+ g1CNS421 - (0.776)g4H02 |
3 |
0.241 |
-1 |
| g1int127 |
+ g1int127 - (0.890)g4H02 |
3 |
0.280 |
-1 |
| g2B09 |
+ g2B09 - (0.942)g4H02 |
3 |
0.274 |
-1 |
| g4A03 |
- (0.899)g4H02 + g4A03 |
3 |
0.172 |
-1 |
| g1CNS465 |
- (1.071)g4H02 + g1CNS465 |
3 |
0.168 |
-1 |
| g1int129 |
- (0.951)g4H02 + g1int129 |
3 |
0.340 |
-1 |
| g7B08 |
- (0.683)g4H02 + g7B08 |
3 |
0.312 |
-1 |
| g1CNS159 |
NA |
4 |
1.000 |
11 |
| g1int846 |
+ g1int846 - (0.935)g1CNS159 |
4 |
0.191 |
1 |
par(op)
U-MAP Visualization
of features
The UMAP on Raw
Data
classes <- unique(dataframe[1:numsub,outcome])
raincolors <- rainbow(length(classes))
names(raincolors) <- classes
topvars <- univariate_BinEnsemble(dataframe,outcome)
lso <- LASSO_MIN(formula(paste(outcome,"~.")),dataframe,family="binomial")
topvars <- unique(c(names(topvars),lso$selectedfeatures))
pander::pander(head(topvars))
g1CNS507, g1CNS382, g1CNS91,
g1CNS105, g1CNS26 and g1int804
# names(topvars)
#if (nrow(dataframe) < 1000)
#{
datasetframe.umap = umap(scale(dataframe[1:numsub,topvars]),n_components=2)
# datasetframe.umap = umap(dataframe[1:numsub,varlist],n_components=2)
plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: Original",t='n')
text(datasetframe.umap$layout,labels=dataframe[1:numsub,outcome],col=raincolors[dataframe[1:numsub,outcome]+1])

#}
The decorralted
UMAP
varlistcV <- names(varratio[varratio >= 0.01])
topvars <- univariate_BinEnsemble(DEdataframe[,varlistcV],outcome)
lso <- LASSO_MIN(formula(paste(outcome,"~.")),DEdataframe[,varlistcV],family="binomial")
topvars <- unique(c(names(topvars),lso$selectedfeatures))
pander::pander(head(topvars))
g1CNS507, g1int340, g1int812,
g9E01, g8D02 and g1CNS29
varlistcV <- varlistcV[varlistcV != outcome]
# DEdataframe[,outcome] <- as.numeric(DEdataframe[,outcome])
#if (nrow(dataframe) < 1000)
#{
datasetframe.umap = umap(scale(DEdataframe[1:numsub,topvars]),n_components=2)
# datasetframe.umap = umap(DEdataframe[1:numsub,varlistcV],n_components=2)
plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: After ILAA",t='n')
text(datasetframe.umap$layout,labels=DEdataframe[1:numsub,outcome],col=raincolors[DEdataframe[1:numsub,outcome]+1])

#}
Univariate
Analysis
Univariate
univarRAW <- uniRankVar(varlist,
paste(outcome,"~1"),
outcome,
dataframe,
rankingTest="AUC")
100 : g1A07 200 : g3H03 300 : g1E07 400 : g4F03 500 : g4C05
600 : g1int292 700 : g1int356 800 : g7A03 900 : g4D11 1000 :
g1int577
1100 : g1CNS420 1200 : g7G07 1300 : g1int785 1400 : g1CNS59 1500 :
g1CNS178
1600 : g1int949 1700 : g1int1028 1800 : g1int1089 1900 : g11D05 2000 :
g1int1222
2100 : g1int1298 2200 : g1int1376 2300 : g1int1449 2400 : g10E08 2500 :
g1CNS90
2600 : g7F11 2700 : g1int1693 2800 : g1CNS93 2900 : g1int1800
univarDe <- uniRankVar(varlistc,
paste(outcome,"~1"),
outcome,
DEdataframe,
rankingTest="AUC",
)
100 : g1A07 200 : g3H03 300 : La_g1E07 400 : g4F03 500 : g4C05
600 : La_g1int292 700 : g1int356 800 : g7A03 900 : g4D11 1000 :
g1int577
1100 : g1CNS420 1200 : La_g7G07 1300 : g1int785 1400 : La_g1CNS59 1500 :
g1CNS178
1600 : g1int949 1700 : La_g1int1028 1800 : La_g1int1089 1900 : g11D05
2000 : g1int1222
2100 : g1int1298 2200 : g1int1376 2300 : La_g1int1449 2400 : g10E08 2500
: g1CNS90
2600 : g7F11 2700 : g1int1693 2800 : La_g1CNS93 2900 : g1int1800
Final Table
univariate_columns <- c("caseMean","caseStd","controlMean","controlStd","controlKSP","ROCAUC")
##top variables
topvar <- c(1:length(varlist)) <= TopVariables
tableRaw <- univarRAW$orderframe[topvar,univariate_columns]
pander::pander(tableRaw)
| g1CNS507 |
-0.250 |
0.222 |
-0.0248 |
0.195 |
0.075349 |
0.796 |
| g1CNS105 |
0.254 |
0.270 |
0.0533 |
0.172 |
0.009926 |
0.749 |
| g1CNS382 |
-0.171 |
0.173 |
-0.0354 |
0.145 |
0.045504 |
0.745 |
| g1int804 |
-0.250 |
0.220 |
-0.0775 |
0.205 |
0.020286 |
0.743 |
| g1CNS91 |
0.310 |
0.294 |
0.0994 |
0.138 |
0.003638 |
0.742 |
| g1CNS26 |
-0.145 |
0.169 |
-0.0101 |
0.125 |
0.657242 |
0.738 |
| g1int340 |
-0.157 |
0.183 |
-0.0169 |
0.139 |
0.028840 |
0.737 |
| g1CNS70 |
0.152 |
0.177 |
0.0352 |
0.115 |
0.022558 |
0.731 |
| g1CNS158 |
0.260 |
0.251 |
0.1007 |
0.167 |
0.000401 |
0.731 |
| g1CNS28 |
0.218 |
0.218 |
0.0744 |
0.143 |
0.001961 |
0.726 |
topLAvar <- univarDe$orderframe$Name[str_detect(univarDe$orderframe$Name,"La_")]
topLAvar <- unique(c(univarDe$orderframe$Name[topvar],topLAvar[1:as.integer(TopVariables/2)]))
finalTable <- univarDe$orderframe[topLAvar,univariate_columns]
pander::pander(finalTable)
| g1CNS507 |
-0.249546 |
0.2224 |
-0.024786 |
0.1946 |
0.075349 |
0.796 |
| g1int340 |
-0.157144 |
0.1829 |
-0.016911 |
0.1391 |
0.028840 |
0.737 |
| g8D02 |
-0.086458 |
0.1772 |
0.027399 |
0.1370 |
0.019618 |
0.725 |
| g1CNS159 |
0.192334 |
0.2488 |
0.046005 |
0.1735 |
0.000826 |
0.723 |
| g1int1671 |
0.126334 |
0.1404 |
0.039686 |
0.1218 |
0.099178 |
0.719 |
| g9E01 |
0.091105 |
0.1633 |
-0.004645 |
0.0984 |
0.039398 |
0.717 |
| g1int812 |
-0.192608 |
0.2140 |
-0.046220 |
0.1755 |
0.235071 |
0.716 |
| g1CNS74 |
0.124918 |
0.1377 |
0.039057 |
0.1157 |
0.163757 |
0.716 |
| g8F04 |
0.022967 |
0.0898 |
-0.046750 |
0.0893 |
0.805754 |
0.710 |
| g5G03 |
-0.113922 |
0.1569 |
-0.014816 |
0.1450 |
0.040736 |
0.706 |
| La_g1int829 |
0.027457 |
0.0662 |
-0.018160 |
0.0654 |
0.686753 |
0.700 |
| La_g4F01 |
0.038348 |
0.0752 |
-0.000825 |
0.0780 |
0.311967 |
0.677 |
| La_g1int1102 |
-0.075044 |
0.0969 |
-0.020829 |
0.0888 |
0.626136 |
0.674 |
| La_g6H03 |
0.000405 |
0.1480 |
-0.045865 |
0.0647 |
0.370420 |
0.667 |
| La_g1CNS91 |
0.081214 |
0.1814 |
0.012656 |
0.0845 |
0.130390 |
0.667 |
dc <- getLatentCoefficients(DEdataframe)
fscores <- attr(DEdataframe,"fscore")
pander::pander(c(mean=mean(sapply(dc,length)),total=length(dc),fraction=length(dc)/(ncol(dataframe)-1)))
theCharformulas <- attr(dc,"LatentCharFormulas")
topvar <- rownames(tableRaw)
finalTable <- rbind(finalTable,tableRaw[topvar[!(topvar %in% topLAvar)],univariate_columns])
orgnamez <- rownames(finalTable)
orgnamez <- str_remove_all(orgnamez,"La_")
finalTable$RAWAUC <- univarRAW$orderframe[orgnamez,"ROCAUC"]
finalTable$DecorFormula <- theCharformulas[rownames(finalTable)]
finalTable$fscores <- fscores[rownames(finalTable)]
finalTable$varratio <- varratio[rownames(finalTable)]
Final_Columns <- c("DecorFormula","caseMean","caseStd","controlMean","controlStd","controlKSP","ROCAUC","RAWAUC","fscores","varratio")
finalTable <- finalTable[order(-finalTable$ROCAUC),]
pander::pander(finalTable[,Final_Columns])
| g1CNS507 |
NA |
-0.249546 |
0.2224 |
-0.024786 |
0.1946 |
0.075349 |
0.796 |
0.796 |
0 |
1.0000 |
| g1CNS105 |
NA |
0.254206 |
0.2698 |
0.053317 |
0.1720 |
0.009926 |
0.749 |
0.749 |
NA |
NA |
| g1CNS382 |
NA |
-0.171019 |
0.1731 |
-0.035377 |
0.1449 |
0.045504 |
0.745 |
0.745 |
NA |
NA |
| g1int804 |
NA |
-0.249725 |
0.2195 |
-0.077451 |
0.2051 |
0.020286 |
0.743 |
0.743 |
NA |
NA |
| g1CNS91 |
NA |
0.309879 |
0.2941 |
0.099384 |
0.1383 |
0.003638 |
0.742 |
0.742 |
NA |
NA |
| g1CNS26 |
NA |
-0.144676 |
0.1686 |
-0.010129 |
0.1253 |
0.657242 |
0.738 |
0.738 |
NA |
NA |
| g1int340 |
NA |
-0.157144 |
0.1829 |
-0.016911 |
0.1391 |
0.028840 |
0.737 |
0.737 |
0 |
1.0000 |
| g1CNS70 |
NA |
0.152301 |
0.1769 |
0.035188 |
0.1152 |
0.022558 |
0.731 |
0.731 |
NA |
NA |
| g1CNS158 |
NA |
0.259813 |
0.2505 |
0.100672 |
0.1674 |
0.000401 |
0.731 |
0.731 |
NA |
NA |
| g1CNS28 |
NA |
0.218429 |
0.2175 |
0.074444 |
0.1432 |
0.001961 |
0.726 |
0.726 |
NA |
NA |
| g8D02 |
NA |
-0.086458 |
0.1772 |
0.027399 |
0.1370 |
0.019618 |
0.725 |
0.725 |
0 |
1.0000 |
| g1CNS159 |
NA |
0.192334 |
0.2488 |
0.046005 |
0.1735 |
0.000826 |
0.723 |
0.723 |
11 |
1.0000 |
| g1int1671 |
NA |
0.126334 |
0.1404 |
0.039686 |
0.1218 |
0.099178 |
0.719 |
0.719 |
0 |
1.0000 |
| g9E01 |
NA |
0.091105 |
0.1633 |
-0.004645 |
0.0984 |
0.039398 |
0.717 |
0.717 |
0 |
1.0000 |
| g1int812 |
NA |
-0.192608 |
0.2140 |
-0.046220 |
0.1755 |
0.235071 |
0.716 |
0.716 |
12 |
1.0000 |
| g1CNS74 |
NA |
0.124918 |
0.1377 |
0.039057 |
0.1157 |
0.163757 |
0.716 |
0.716 |
0 |
1.0000 |
| g8F04 |
NA |
0.022967 |
0.0898 |
-0.046750 |
0.0893 |
0.805754 |
0.710 |
0.710 |
0 |
1.0000 |
| g5G03 |
NA |
-0.113922 |
0.1569 |
-0.014816 |
0.1450 |
0.040736 |
0.706 |
0.706 |
0 |
1.0000 |
| La_g1int829 |
- (1.024)g1int834 + g1int829 |
0.027457 |
0.0662 |
-0.018160 |
0.0654 |
0.686753 |
0.700 |
0.617 |
-1 |
0.0551 |
| La_g4F01 |
+ g4F01 - (0.613)g1int98 |
0.038348 |
0.0752 |
-0.000825 |
0.0780 |
0.311967 |
0.677 |
0.617 |
-1 |
0.3057 |
| La_g1int1102 |
- (0.868)g1int1101 + g1int1102 |
-0.075044 |
0.0969 |
-0.020829 |
0.0888 |
0.626136 |
0.674 |
0.644 |
-1 |
0.2744 |
| La_g6H03 |
+ g6H03 - (1.057)g1int1515 |
0.000405 |
0.1480 |
-0.045865 |
0.0647 |
0.370420 |
0.667 |
0.523 |
-1 |
0.1964 |
| La_g1CNS91 |
- (0.985)g5F04 + g1CNS91 |
0.081214 |
0.1814 |
0.012656 |
0.0845 |
0.130390 |
0.667 |
0.742 |
0 |
0.3255 |
Comparing ILAA vs
PCA vs EFA
PCA
featuresnames <- colnames(dataframe)[colnames(dataframe) != outcome]
pc <- prcomp(dataframe[,iscontinous],center = TRUE,scale. = TRUE,tol=0.01) #principal components
predPCA <- predict(pc,dataframe[,iscontinous])
PCAdataframe <- as.data.frame(cbind(predPCA,dataframe[,!iscontinous]))
colnames(PCAdataframe) <- c(colnames(predPCA),colnames(dataframe)[!iscontinous])
#plot(PCAdataframe[,colnames(PCAdataframe)!=outcome],col=dataframe[,outcome],cex=0.65,cex.lab=0.5,cex.axis=0.75,cex.sub=0.5,cex.main=0.75)
#pander::pander(pc$rotation)
PCACor <- cor(PCAdataframe[,colnames(PCAdataframe) != outcome])
gplots::heatmap.2(abs(PCACor),
trace = "none",
# scale = "row",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "PCA Correlation",
cexRow = 0.5,
cexCol = 0.5,
srtCol=45,
srtRow= -45,
key.title=NA,
key.xlab="Pearson Correlation",
xlab="Feature", ylab="Feature")

EFA
EFAdataframe <- dataframeScaled
if (length(iscontinous) < 2000)
{
topred <- min(length(iscontinous),nrow(dataframeScaled),ncol(predPCA)-1)
if (topred < 2) topred <- 2
uls <- fa(dataframeScaled[,iscontinous],nfactors=topred,rotate="varimax",warnings=FALSE) # EFA analysis
predEFA <- predict(uls,dataframeScaled[,iscontinous])
EFAdataframe <- as.data.frame(cbind(predEFA,dataframeScaled[,!iscontinous]))
colnames(EFAdataframe) <- c(colnames(predEFA),colnames(dataframeScaled)[!iscontinous])
EFACor <- cor(EFAdataframe[,colnames(EFAdataframe) != outcome])
gplots::heatmap.2(abs(EFACor),
trace = "none",
# scale = "row",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "EFA Correlation",
cexRow = 0.5,
cexCol = 0.5,
srtCol=45,
srtRow= -45,
key.title=NA,
key.xlab="Pearson Correlation",
xlab="Feature", ylab="Feature")
}
Effect on CAR
modeling
par(op)
par(xpd = TRUE)
dataframe[,outcome] <- factor(dataframe[,outcome])
rawmodel <- rpart(paste(outcome,"~."),dataframe,control=rpart.control(maxdepth=3))
pr <- predict(rawmodel,dataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
plot(rawmodel,main="Raw",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
text(rawmodel, use.n = TRUE,cex=0.75)
ptab <- epiR::epi.tests(table(pr==0,dataframe[,outcome]==0))
}

pander::pander(table(dataframe[,outcome],pr))
pander::pander(ptab$detail[c(5,3,4,6),])
| 5 |
diag.ac |
0.917 |
0.864 |
0.954 |
| 3 |
se |
0.825 |
0.701 |
0.913 |
| 4 |
sp |
0.964 |
0.910 |
0.990 |
| 6 |
diag.or |
125.725 |
37.521 |
421.276 |
par(op)
par(xpd = TRUE)
DEdataframe[,outcome] <- factor(DEdataframe[,outcome])
IDeAmodel <- rpart(paste(outcome,"~."),DEdataframe[,c(outcome,varlistcV)],control=rpart.control(maxdepth=3))
pr <- predict(IDeAmodel,DEdataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
plot(IDeAmodel,main="ILAA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
text(IDeAmodel, use.n = TRUE,cex=0.75)
ptab <- epiR::epi.tests(table(pr==0,DEdataframe[,outcome]==0))
}

pander::pander(table(DEdataframe[,outcome],pr))
pander::pander(ptab$detail[c(5,3,4,6),])
| 5 |
diag.ac |
0.917 |
0.864 |
0.954 |
| 3 |
se |
0.895 |
0.785 |
0.960 |
| 4 |
sp |
0.928 |
0.863 |
0.968 |
| 6 |
diag.or |
109.438 |
36.051 |
332.214 |
par(op)
par(xpd = TRUE)
PCAdataframe[,outcome] <- factor(PCAdataframe[,outcome])
PCAmodel <- rpart(paste(outcome,"~."),PCAdataframe,control=rpart.control(maxdepth=3))
pr <- predict(PCAmodel,PCAdataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
plot(PCAmodel,main="PCA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
text(PCAmodel, use.n = TRUE,cex=0.75)
ptab <- epiR::epi.tests(table(pr==0,PCAdataframe[,outcome]==0))
}

pander::pander(table(PCAdataframe[,outcome],pr))
pander::pander(ptab$detail[c(5,3,4,6),])
| 5 |
diag.ac |
0.804 |
0.735 |
0.861 |
| 3 |
se |
0.526 |
0.390 |
0.660 |
| 4 |
sp |
0.946 |
0.886 |
0.980 |
| 6 |
diag.or |
19.444 |
7.347 |
51.459 |
par(op)
EFA
EFAdataframe[,outcome] <- factor(EFAdataframe[,outcome])
EFAmodel <- rpart(paste(outcome,"~."),EFAdataframe,control=rpart.control(maxdepth=3))
pr <- predict(EFAmodel,EFAdataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
plot(EFAmodel,main="EFA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
text(EFAmodel, use.n = TRUE,cex=0.75)
ptab <- epiR::epi.tests(table(pr==0,EFAdataframe[,outcome]==0))
}

pander::pander(table(EFAdataframe[,outcome],pr))
pander::pander(ptab$detail[c(5,3,4,6),])
| 5 |
diag.ac |
0.917 |
0.864 |
0.954 |
| 3 |
se |
0.825 |
0.701 |
0.913 |
| 4 |
sp |
0.964 |
0.910 |
0.990 |
| 6 |
diag.or |
125.725 |
37.521 |
421.276 |
par(op)